package hfy

import org.apache.spark.{SparkConf, SparkContext}

object Groupbykey {
  def main(agrs:Array[String]):Unit= {
    val conf = new SparkConf().setAppName("Groupbykey").setMaster("local[*]")
    val sc = new SparkContext(conf)

    val rdd_1=sc.parallelize(List(('a',1),('a',2),('b',1),('c',1),('c',1)))
    val g_rdd=rdd_1.groupByKey()
    g_rdd.collect()
    g_rdd.map(x=>(x._1, x._2.size)).collect().foreach(println)
  }
}
